# =============================================================================
# FILE 1. DATA PREP PIPELINE — CITIZEN SURVEYS
# =============================================================================
# PURPOSE
#   Clean, harmonize, and export citizen survey datasets (2023 & 2024) for downstream analysis.
#
# REPLICATION / RUN NOTES
#   1) Install required packages listed in Section 0.
#   2) Set the working directory in Section 1 to the folder that contains the input data files.
#   3) Ensure any prerequisite .do files referenced in comments have been run to generate .dta inputs.
#   4) Run this script top-to-bottom in a fresh R session.
#
# INPUT FILES (relative to working directory)
#   - Data_Citizens_2023.sav
#   - Data_Citizens_2024.sav
#   - cf_flanders_2023.dta
#   - cf_flanders_2024.dta
#
# OUTPUT FILES (written to working directory)
#   - cf_flanders_2023_data.dta
#   - cf_flanders_2023_data.rds
#   - cf_flanders_2024_data.dta
#   - cf_flanders_2024_data.rds
#   - cf_flanders_data.dta
#   - cf_flanders_data.rds
# =============================================================================

################################################################################
## 1. WORKING DIRECTORY + FILE LOCATIONS
################################################################################

getwd()

# Set project directory (update it to your own directory that contains the input files)
setwd("updated_path_here")

################################################################################
## 2. CITIZEN SURVEYS
################################################################################

################################################################################
## 2.1. LOAD 2023 DATA
################################################################################

# --- Data ingest: 2023 citizen survey (SPSS) ---
# Expectation: Data_Citizens_2023.sav is present in the working directory.
# Load data (2023)
cf.flanders.2023.data <- read_sav("Data_Citizens_2023.sav") 

# Add ID column (2023)
cf.flanders.2023.data <- cf.flanders.2023.data %>%
  dplyr::mutate(id = row_number())

# Convert column names to lowercase (2023)
colnames(cf.flanders.2023.data) <- tolower(colnames(cf.flanders.2023.data))

# --- Auxiliary merge: year-of-birth labels (2023) ---
# Rationale: The .sav may not contain a clean year-of-birth label; the Stata .dta provides it.
# Merge with year of birth data (2023) -- Run "cf_flanders_2023.do" first
cf.flanders.2023.yob <- read.dta("cf_flanders_2023.dta") %>% dplyr::select(q4_label) 
cf.flanders.2023.yob <- cf.flanders.2023.yob %>%
  dplyr::mutate(id = row_number())
cf.flanders.2023.data <- merge(cf.flanders.2023.data, cf.flanders.2023.yob, by = "id", all = FALSE)

################################################################################
## 2.2. LOAD 2024 DATA
################################################################################

# Load data (2024)
cf.flanders.2024.data <- read_sav("Data_Citizens_2024.sav")        

# Add ID column (2024)
cf.flanders.2024.data <- cf.flanders.2024.data %>%
  dplyr::mutate(id = row_number())

# Merge with year of birth data (2024) -- Run "cf_flanders_2024.do" first
cf.flanders.2024.yob <- read.dta("cf_flanders_2024.dta") %>% dplyr::select("_v3_label")    
cf.flanders.2024.yob <- cf.flanders.2024.yob %>%
  dplyr::mutate(id = row_number())
cf.flanders.2024.data <- merge(cf.flanders.2024.data, cf.flanders.2024.yob, by = "id", all = FALSE)

################################################################################
## 2.3. DEFINE NEW VARIABLES
################################################################################

################################################################################
## 2.3.1. SEAT EXPECTATIONS
################################################################################

# Seat expectations (2023)
cf.flanders.2023.data$seats_pvda <- cf.flanders.2023.data$q24_1
cf.flanders.2023.data$seats_groen <- cf.flanders.2023.data$q24_2
cf.flanders.2023.data$seats_vooruit <- cf.flanders.2023.data$q24_3
cf.flanders.2023.data$seats_cdv <- cf.flanders.2023.data$q24_4
cf.flanders.2023.data$seats_ovld <- cf.flanders.2023.data$q24_5
cf.flanders.2023.data$seats_nva <- cf.flanders.2023.data$q24_6
cf.flanders.2023.data$seats_vb <- cf.flanders.2023.data$q24_7

# Seat expectations: Remove -99 values (2023)
cf.flanders.2023.data <- cf.flanders.2023.data %>%
  replace_with_na(replace = list(seats_pvda = c(-99),
                                 seats_groen = c(-99),
                                 seats_vooruit = c(-99),
                                 seats_cdv = c(-99),
                                 seats_ovld = c(-99),
                                 seats_nva = c(-99),
                                 seats_vb = c(-99)))

# Seat expectations: Add labels to answer categories (2023)
cf.flanders.2023.data$seats_pvda <- factor(cf.flanders.2023.data$seats_pvda, 
                                           levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2023.data$seats_groen <- factor(cf.flanders.2023.data$seats_groen, 
                                            levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2023.data$seats_vooruit <- factor(cf.flanders.2023.data$seats_vooruit, 
                                              levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2023.data$seats_cdv <- factor(cf.flanders.2023.data$seats_cdv, 
                                          levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2023.data$seats_ovld <- factor(cf.flanders.2023.data$seats_ovld, 
                                           levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2023.data$seats_nva <- factor(cf.flanders.2023.data$seats_nva, 
                                          levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2023.data$seats_vb <- factor(cf.flanders.2023.data$seats_vb, 
                                         levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))

# Seat expectations (2024)
cf.flanders.2024.data$seats_pvda <- cf.flanders.2024.data$Q24_4
cf.flanders.2024.data$seats_groen <- cf.flanders.2024.data$Q24_5
cf.flanders.2024.data$seats_vooruit <- cf.flanders.2024.data$Q24_6
cf.flanders.2024.data$seats_cdv <- cf.flanders.2024.data$Q24_7
cf.flanders.2024.data$seats_ovld <- cf.flanders.2024.data$Q24_9
cf.flanders.2024.data$seats_nva <- cf.flanders.2024.data$Q24_10
cf.flanders.2024.data$seats_vb <- cf.flanders.2024.data$Q24_11

# Seat expectations: Remove -99 values (2024)
cf.flanders.2024.data <- cf.flanders.2024.data %>%
  replace_with_na(replace = list(seats_pvda = c(-99),
                                 seats_groen = c(-99),
                                 seats_vooruit = c(-99),
                                 seats_cdv = c(-99),
                                 seats_ovld = c(-99),
                                 seats_nva = c(-99),
                                 seats_vb = c(-99)))

# Seat expectations: Add labels to answer categories (2024)
cf.flanders.2024.data$seats_pvda <- factor(cf.flanders.2024.data$seats_pvda, 
                                           levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2024.data$seats_groen <- factor(cf.flanders.2024.data$seats_groen, 
                                            levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2024.data$seats_vooruit <- factor(cf.flanders.2024.data$seats_vooruit, 
                                              levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2024.data$seats_cdv <- factor(cf.flanders.2024.data$seats_cdv, 
                                          levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2024.data$seats_ovld <- factor(cf.flanders.2024.data$seats_ovld, 
                                           levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2024.data$seats_nva <- factor(cf.flanders.2024.data$seats_nva, 
                                          levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))
cf.flanders.2024.data$seats_vb <- factor(cf.flanders.2024.data$seats_vb, 
                                         levels = c(13, 12, 1), labels = c("Fewer", "Same", "More"))

################################################################################
## 2.3.2. CORRECT SEAT EXPECTATIONS
################################################################################

# Correct seat expectations: PVDA (2023)
cf.flanders.2023.data$seats_pvda_correct <- NA
cf.flanders.2023.data$seats_pvda_correct[cf.flanders.2023.data$seats_pvda=="More"] <- 1
cf.flanders.2023.data$seats_pvda_correct[cf.flanders.2023.data$seats_pvda=="Same"] <- 0
cf.flanders.2023.data$seats_pvda_correct[cf.flanders.2023.data$seats_pvda=="Fewer"] <- 0

# Correct seat expectations: Groen (2023)
cf.flanders.2023.data$seats_groen_correct <- NA
cf.flanders.2023.data$seats_groen_correct[cf.flanders.2023.data$seats_groen=="More"] <- 0
cf.flanders.2023.data$seats_groen_correct[cf.flanders.2023.data$seats_groen=="Same"] <- 0
cf.flanders.2023.data$seats_groen_correct[cf.flanders.2023.data$seats_groen=="Fewer"] <- 1

# Correct seat expectations: Vooruit (2023)
cf.flanders.2023.data$seats_vooruit_correct <- NA
cf.flanders.2023.data$seats_vooruit_correct[cf.flanders.2023.data$seats_vooruit=="More"] <- 1
cf.flanders.2023.data$seats_vooruit_correct[cf.flanders.2023.data$seats_vooruit=="Same"] <- 0
cf.flanders.2023.data$seats_vooruit_correct[cf.flanders.2023.data$seats_vooruit=="Fewer"] <- 0

# Correct seat expectations: CD&V (2023)
cf.flanders.2023.data$seats_cdv_correct <- NA
cf.flanders.2023.data$seats_cdv_correct[cf.flanders.2023.data$seats_cdv=="More"] <- 0
cf.flanders.2023.data$seats_cdv_correct[cf.flanders.2023.data$seats_cdv=="Same"] <- 0
cf.flanders.2023.data$seats_cdv_correct[cf.flanders.2023.data$seats_cdv=="Fewer"] <- 1

# Correct seat expectations: Open Vld (2023)
cf.flanders.2023.data$seats_ovld_correct <- NA
cf.flanders.2023.data$seats_ovld_correct[cf.flanders.2023.data$seats_ovld=="More"] <- 0
cf.flanders.2023.data$seats_ovld_correct[cf.flanders.2023.data$seats_ovld=="Same"] <- 0
cf.flanders.2023.data$seats_ovld_correct[cf.flanders.2023.data$seats_ovld=="Fewer"] <- 1

# Correct seat expectations: N-VA (2023)
cf.flanders.2023.data$seats_nva_correct <- NA
cf.flanders.2023.data$seats_nva_correct[cf.flanders.2023.data$seats_nva=="More"] <- 0
cf.flanders.2023.data$seats_nva_correct[cf.flanders.2023.data$seats_nva=="Same"] <- 0
cf.flanders.2023.data$seats_nva_correct[cf.flanders.2023.data$seats_nva=="Fewer"] <- 1

# Correct seat expectations: Vlaams Belang (2023)
cf.flanders.2023.data$seats_vb_correct <- NA
cf.flanders.2023.data$seats_vb_correct[cf.flanders.2023.data$seats_vb=="More"] <- 1
cf.flanders.2023.data$seats_vb_correct[cf.flanders.2023.data$seats_vb=="Same"] <- 0
cf.flanders.2023.data$seats_vb_correct[cf.flanders.2023.data$seats_vb=="Fewer"] <- 0

# Correct seat expectations: PVDA (2024)
cf.flanders.2024.data$seats_pvda_correct <- NA
cf.flanders.2024.data$seats_pvda_correct[cf.flanders.2024.data$seats_pvda=="More"] <- 1
cf.flanders.2024.data$seats_pvda_correct[cf.flanders.2024.data$seats_pvda=="Same"] <- 0
cf.flanders.2024.data$seats_pvda_correct[cf.flanders.2024.data$seats_pvda=="Fewer"] <- 0

# Correct seat expectations: Groen (2024)
cf.flanders.2024.data$seats_groen_correct <- NA
cf.flanders.2024.data$seats_groen_correct[cf.flanders.2024.data$seats_groen=="More"] <- 0
cf.flanders.2024.data$seats_groen_correct[cf.flanders.2024.data$seats_groen=="Same"] <- 0
cf.flanders.2024.data$seats_groen_correct[cf.flanders.2024.data$seats_groen=="Fewer"] <- 1

# Correct seat expectations: Vooruit (2024)
cf.flanders.2024.data$seats_vooruit_correct <- NA
cf.flanders.2024.data$seats_vooruit_correct[cf.flanders.2024.data$seats_vooruit=="More"] <- 1
cf.flanders.2024.data$seats_vooruit_correct[cf.flanders.2024.data$seats_vooruit=="Same"] <- 0
cf.flanders.2024.data$seats_vooruit_correct[cf.flanders.2024.data$seats_vooruit=="Fewer"] <- 0

# Correct seat expectations: CD&V (2024)
cf.flanders.2024.data$seats_cdv_correct <- NA
cf.flanders.2024.data$seats_cdv_correct[cf.flanders.2024.data$seats_cdv=="More"] <- 0
cf.flanders.2024.data$seats_cdv_correct[cf.flanders.2024.data$seats_cdv=="Same"] <- 0
cf.flanders.2024.data$seats_cdv_correct[cf.flanders.2024.data$seats_cdv=="Fewer"] <- 1

# Correct seat expectations: Open Vld (2024)
cf.flanders.2024.data$seats_ovld_correct <- NA
cf.flanders.2024.data$seats_ovld_correct[cf.flanders.2024.data$seats_ovld=="More"] <- 0
cf.flanders.2024.data$seats_ovld_correct[cf.flanders.2024.data$seats_ovld=="Same"] <- 0
cf.flanders.2024.data$seats_ovld_correct[cf.flanders.2024.data$seats_ovld=="Fewer"] <- 1

# Correct seat expectations: N-VA (2024)
cf.flanders.2024.data$seats_nva_correct <- NA
cf.flanders.2024.data$seats_nva_correct[cf.flanders.2024.data$seats_nva=="More"] <- 0
cf.flanders.2024.data$seats_nva_correct[cf.flanders.2024.data$seats_nva=="Same"] <- 0
cf.flanders.2024.data$seats_nva_correct[cf.flanders.2024.data$seats_nva=="Fewer"] <- 1

# Correct seat expectations: Vlaams Belang (2024)
cf.flanders.2024.data$seats_vb_correct <- NA
cf.flanders.2024.data$seats_vb_correct[cf.flanders.2024.data$seats_vb=="More"] <- 1
cf.flanders.2024.data$seats_vb_correct[cf.flanders.2024.data$seats_vb=="Same"] <- 0
cf.flanders.2024.data$seats_vb_correct[cf.flanders.2024.data$seats_vb=="Fewer"] <- 0

################################################################################
## 2.3.3. SEAT FORECASTS ACCURACY
################################################################################

# Seat forecasts accuracy (2023)
cf.flanders.2023.data$seats_accuracy <- cf.flanders.2023.data$seats_pvda_correct +
  cf.flanders.2023.data$seats_groen_correct + 
  cf.flanders.2023.data$seats_vooruit_correct +
  cf.flanders.2023.data$seats_cdv_correct +
  cf.flanders.2023.data$seats_ovld_correct +
  cf.flanders.2023.data$seats_nva_correct +
  cf.flanders.2023.data$seats_vb_correct

# Seat forecasts accuracy (2024)
cf.flanders.2024.data$seats_accuracy <- cf.flanders.2024.data$seats_pvda_correct +
  cf.flanders.2024.data$seats_groen_correct + 
  cf.flanders.2024.data$seats_vooruit_correct +
  cf.flanders.2024.data$seats_cdv_correct +
  cf.flanders.2024.data$seats_ovld_correct +
  cf.flanders.2024.data$seats_nva_correct +
  cf.flanders.2024.data$seats_vb_correct 

################################################################################
## 2.3.4. GOVERNMENT EXPECTATIONS
################################################################################

# Government expectations (2023)
cf.flanders.2023.data$govt_pvda <- cf.flanders.2023.data$q25_1
cf.flanders.2023.data$govt_groen <- cf.flanders.2023.data$q25_2
cf.flanders.2023.data$govt_vooruit <- cf.flanders.2023.data$q25_3
cf.flanders.2023.data$govt_cdv <- cf.flanders.2023.data$q25_4
cf.flanders.2023.data$govt_ovld <- cf.flanders.2023.data$q25_5
cf.flanders.2023.data$govt_nva <- cf.flanders.2023.data$q25_6
cf.flanders.2023.data$govt_vb <- cf.flanders.2023.data$q25_7

# Government expectations: Remove -99 values (2023)
cf.flanders.2023.data <- cf.flanders.2023.data %>%
  replace_with_na(replace = list(govt_pvda = c(-99),
                                 govt_groen = c(-99),
                                 govt_vooruit = c(-99),
                                 govt_cdv = c(-99),
                                 govt_ovld = c(-99),
                                 govt_nva = c(-99),
                                 govt_vb = c(-99)))

# Government expectations: Rescale from 0 to 10 (2023)
cf.flanders.2023.data$govt_pvda <- (cf.flanders.2023.data$govt_pvda - 1)
cf.flanders.2023.data$govt_groen <- (cf.flanders.2023.data$govt_groen - 1)
cf.flanders.2023.data$govt_vooruit <- (cf.flanders.2023.data$govt_vooruit - 1)
cf.flanders.2023.data$govt_cdv <- (cf.flanders.2023.data$govt_cdv - 1)
cf.flanders.2023.data$govt_ovld <- (cf.flanders.2023.data$govt_ovld - 1)
cf.flanders.2023.data$govt_nva <- (cf.flanders.2023.data$govt_nva - 1)
cf.flanders.2023.data$govt_vb <- (cf.flanders.2023.data$govt_vb - 1)

# Government expectations (2024)
cf.flanders.2024.data$govt_pvda <- cf.flanders.2024.data$Q25_4
cf.flanders.2024.data$govt_groen <- cf.flanders.2024.data$Q25_5
cf.flanders.2024.data$govt_vooruit <- cf.flanders.2024.data$Q25_6
cf.flanders.2024.data$govt_cdv <- cf.flanders.2024.data$Q25_7
cf.flanders.2024.data$govt_ovld <- cf.flanders.2024.data$Q25_9
cf.flanders.2024.data$govt_nva <- cf.flanders.2024.data$Q25_10
cf.flanders.2024.data$govt_vb <- cf.flanders.2024.data$Q25_11

# Government expectations: Remove -99 values (2024)
cf.flanders.2024.data <- cf.flanders.2024.data %>%
  replace_with_na(replace = list(govt_pvda = c(-99),
                                 govt_groen = c(-99),
                                 govt_vooruit = c(-99),
                                 govt_cdv = c(-99),
                                 govt_ovld = c(-99),
                                 govt_nva = c(-99),
                                 govt_vb = c(-99)))

# Government expectations: Rescale from 0 to 10 (2024)
cf.flanders.2024.data$govt_pvda <- (cf.flanders.2024.data$govt_pvda - 1)
cf.flanders.2024.data$govt_groen <- (cf.flanders.2024.data$govt_groen - 1)
cf.flanders.2024.data$govt_vooruit <- (cf.flanders.2024.data$govt_vooruit - 1)
cf.flanders.2024.data$govt_cdv <- (cf.flanders.2024.data$govt_cdv - 1)
cf.flanders.2024.data$govt_ovld <- (cf.flanders.2024.data$govt_ovld - 1)
cf.flanders.2024.data$govt_nva <- (cf.flanders.2024.data$govt_nva - 1)
cf.flanders.2024.data$govt_vb <- (cf.flanders.2024.data$govt_vb - 1)

################################################################################
## 2.3.5. GOVERNMENT FORECASTS ACCURACY
################################################################################

# Coalition forecasts accuracy (2023)
cf.flanders.2023.data$govt_accuracy <- cf.flanders.2023.data$govt_nva + 
  cf.flanders.2023.data$govt_vooruit + cf.flanders.2023.data$govt_cdv - 
  cf.flanders.2023.data$govt_pvda - cf.flanders.2023.data$govt_groen -
  cf.flanders.2023.data$govt_ovld - cf.flanders.2023.data$govt_vb

# Coalition forecasts accuracy: Rescale from 0 to 1 (2023)
cf.flanders.2023.data$govt_accuracy <- (cf.flanders.2023.data$govt_accuracy - min(cf.flanders.2023.data$govt_accuracy, na.rm = TRUE)) / (max(cf.flanders.2023.data$govt_accuracy, na.rm = TRUE) - min(cf.flanders.2023.data$govt_accuracy, na.rm = TRUE))

# Coalition forecasts accuracy (2024)
cf.flanders.2024.data$govt_accuracy <- cf.flanders.2024.data$govt_nva + 
  cf.flanders.2024.data$govt_vooruit + cf.flanders.2024.data$govt_cdv - 
  cf.flanders.2024.data$govt_pvda - cf.flanders.2024.data$govt_groen -
  cf.flanders.2024.data$govt_ovld - cf.flanders.2024.data$govt_vb

# Coalition forecasts accuracy: Rescale from 0 to 1 (2024)
cf.flanders.2024.data$govt_accuracy <- (cf.flanders.2024.data$govt_accuracy - min(cf.flanders.2024.data$govt_accuracy, na.rm = TRUE)) / (max(cf.flanders.2024.data$govt_accuracy, na.rm = TRUE) - min(cf.flanders.2024.data$govt_accuracy, na.rm = TRUE))

################################################################################
## 2.3.6. ACTUAL SEAT OUTCOME
################################################################################

# Actual seat outcome (2023)
cf.flanders.2023.data$seats_pvda_outcome <- 1
cf.flanders.2023.data$seats_groen_outcome <- 0
cf.flanders.2023.data$seats_vooruit_outcome <- 1
cf.flanders.2023.data$seats_cdv_outcome <- 0
cf.flanders.2023.data$seats_ovld_outcome <- 0
cf.flanders.2023.data$seats_nva_outcome <- 0
cf.flanders.2023.data$seats_vb_outcome <- 1

# Actual seat outcome (2024)
cf.flanders.2024.data$seats_pvda_outcome <- 1
cf.flanders.2024.data$seats_groen_outcome <- 0
cf.flanders.2024.data$seats_vooruit_outcome <- 1
cf.flanders.2024.data$seats_cdv_outcome <- 0
cf.flanders.2024.data$seats_ovld_outcome <- 0
cf.flanders.2024.data$seats_nva_outcome <- 0
cf.flanders.2024.data$seats_vb_outcome <- 1

################################################################################
## 2.3.7. ACTUAL COALITION OUTCOME
################################################################################

# Actual coalition outcome (2023)
cf.flanders.2023.data$govt_pvda_outcome <- 0
cf.flanders.2023.data$govt_groen_outcome <- 0
cf.flanders.2023.data$govt_vooruit_outcome <- 1
cf.flanders.2023.data$govt_cdv_outcome <- 1
cf.flanders.2023.data$govt_ovld_outcome <- 0
cf.flanders.2023.data$govt_nva_outcome <- 1
cf.flanders.2023.data$govt_vb_outcome <- 0

# Actual coalition outcome (2024)
cf.flanders.2024.data$govt_pvda_outcome <- 0
cf.flanders.2024.data$govt_groen_outcome <- 0
cf.flanders.2024.data$govt_vooruit_outcome <- 1
cf.flanders.2024.data$govt_cdv_outcome <- 1
cf.flanders.2024.data$govt_ovld_outcome <- 0
cf.flanders.2024.data$govt_nva_outcome <- 1
cf.flanders.2024.data$govt_vb_outcome <- 0

################################################################################
## 2.3.7. BRIER SCORES FOR GOVERNMENT PROBABILITY
################################################################################

# Brier scores (2023)
cf.flanders.2023.data$govt_pvda_brier <- 1 - ((cf.flanders.2023.data$govt_pvda/10 - 0)^2)
cf.flanders.2023.data$govt_groen_brier <- 1- ((cf.flanders.2023.data$govt_groen/10 - 0)^2)
cf.flanders.2023.data$govt_vooruit_brier <- 1 - ((cf.flanders.2023.data$govt_vooruit/10 - 1)^2)
cf.flanders.2023.data$govt_cdv_brier <- 1 - ((cf.flanders.2023.data$govt_cdv/10 - 1)^2)
cf.flanders.2023.data$govt_ovld_brier <- 1 - ((cf.flanders.2023.data$govt_ovld/10 - 0)^2)
cf.flanders.2023.data$govt_nva_brier <- 1 - ((cf.flanders.2023.data$govt_nva/10 - 1)^2)
cf.flanders.2023.data$govt_vb_brier <- 1 - ((cf.flanders.2023.data$govt_vb/10 - 0)^2)

cf.flanders.2023.data$govt_brier <- cf.flanders.2023.data$govt_pvda_brier +
  cf.flanders.2023.data$govt_groen_brier +
  cf.flanders.2023.data$govt_vooruit_brier +
  cf.flanders.2023.data$govt_cdv_brier +
  cf.flanders.2023.data$govt_ovld_brier +
  cf.flanders.2023.data$govt_nva_brier +
  cf.flanders.2023.data$govt_vb_brier

# Brier scores (2024)
cf.flanders.2024.data$govt_pvda_brier <- 1 - ((cf.flanders.2024.data$govt_pvda/10 - 0)^2)
cf.flanders.2024.data$govt_groen_brier <- 1 - ((cf.flanders.2024.data$govt_groen/10 - 0)^2)
cf.flanders.2024.data$govt_vooruit_brier <- 1 - ((cf.flanders.2024.data$govt_vooruit/10 - 1)^2)
cf.flanders.2024.data$govt_cdv_brier <- 1 - ((cf.flanders.2024.data$govt_cdv/10 - 1)^2)
cf.flanders.2024.data$govt_ovld_brier <- 1 - ((cf.flanders.2024.data$govt_ovld/10 - 0)^2)
cf.flanders.2024.data$govt_nva_brier <- 1 - ((cf.flanders.2024.data$govt_nva/10 - 1)^2)
cf.flanders.2024.data$govt_vb_brier <- 1 - ((cf.flanders.2024.data$govt_vb/10 - 0)^2)

cf.flanders.2024.data$govt_brier <- cf.flanders.2024.data$govt_pvda_brier +
  cf.flanders.2024.data$govt_groen_brier +
  cf.flanders.2024.data$govt_vooruit_brier +
  cf.flanders.2024.data$govt_cdv_brier +
  cf.flanders.2024.data$govt_ovld_brier +
  cf.flanders.2024.data$govt_nva_brier +
  cf.flanders.2024.data$govt_vb_brier

################################################################################
## 2.3.8. PROVINCE OF RESIDENCE
################################################################################

# Revise incorrect postal code (2023)
cf.flanders.2023.data$q5[cf.flanders.2023.data$q5=="362o"] <- 3620
cf.flanders.2023.data$q5[cf.flanders.2023.data$q5=="Kortemark"] <- 8610
cf.flanders.2023.data$q5 <- gsub("[a-zA-Z]", "", cf.flanders.2023.data$q5)

# Province of residence: Flanders (2023)
cf.flanders.2023.data$province <- ""
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 2000 & cf.flanders.2023.data$q5 <= 2999] <- "Antwerpen"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 9000 & cf.flanders.2023.data$q5 <= 9999] <- "Oost-Vlaanderen"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 1500 & cf.flanders.2023.data$q5 <= 1999] <- "Vlaams-Brabant"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 3000 & cf.flanders.2023.data$q5 <= 3499] <- "Vlaams-Brabant"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 3500 & cf.flanders.2023.data$q5 <= 3999] <- "Limburg"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 8000 & cf.flanders.2023.data$q5 <= 8999] <- "West-Vlaanderen"

# Province of residence: Brussels and Wllonia (2023)
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 1000 & cf.flanders.2023.data$q5 <= 1299] <- "Brussels"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 7000 & cf.flanders.2023.data$q5 <= 7999] <- "Hainaut (West)"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 6000 & cf.flanders.2023.data$q5 <= 6999] <- "Hainaut (East)/Luxembourg"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 4000 & cf.flanders.2023.data$q5 <= 4999] <- "Liège"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 5000 & cf.flanders.2023.data$q5 <= 5999] <- "Namur"
cf.flanders.2023.data$province[cf.flanders.2023.data$q5 >= 1300 & cf.flanders.2023.data$q5 <= 1499] <- "Walloon Brabant"

# Province of residence: frequency table (2023) 
# Reference: Appendix - Table A1
freq_table <- table(cf.flanders.2023.data$province)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.9. SEX
################################################################################

# Sex (2023)
cf.flanders.2023.data$sex <- cf.flanders.2023.data$q3
cf.flanders.2023.data$sex[cf.flanders.2023.data$q3==3] <- NA
cf.flanders.2023.data$sex[cf.flanders.2023.data$q3==-99] <- NA
cf.flanders.2023.data$sex[cf.flanders.2023.data$q3==2] <- 0

# Sex: Add labels to answer categories (2023)
cf.flanders.2023.data$sex <- factor(cf.flanders.2023.data$sex, 
                                    levels = c(0, 1), labels = c("Female", "Male"))

# Sex: frequency table (2023) 
# Reference: Appendix - Table A1
freq_table <- table(cf.flanders.2023.data$q3)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

# Sex (2024)
cf.flanders.2024.data$sex <- cf.flanders.2024.data$Q2.2
cf.flanders.2024.data$sex[cf.flanders.2024.data$Q2.2==3] <- NA
cf.flanders.2024.data$sex[cf.flanders.2024.data$Q2.2==-99] <- NA
cf.flanders.2024.data$sex[cf.flanders.2024.data$Q2.2==2] <- 0

# Sex: Add labels to answer categories (2024)
cf.flanders.2024.data$sex <- factor(cf.flanders.2024.data$sex, 
                                    levels = c(0, 1), labels = c("Female", "Male"))

# Sex: frequency table (2024)
# Reference: Appendix - Table A1
freq_table <- table(cf.flanders.2024.data$Q2.2)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.10. AGE
################################################################################

# Age (2023)
cf.flanders.2023.data$yob <- as.numeric(cf.flanders.2023.data$q4_label)
cf.flanders.2023.data$age <- 2023 - cf.flanders.2023.data$yob
cf.flanders.2023.data$age[cf.flanders.2023.data$age < 18] <- NA

# Age group (2023)
cf.flanders.2023.data$agegroup <- ""
cf.flanders.2023.data$agegroup[cf.flanders.2023.data$age >= 18 & cf.flanders.2023.data$age <= 34] <- "18-34"
cf.flanders.2023.data$agegroup[cf.flanders.2023.data$age >= 35 & cf.flanders.2023.data$age <= 54] <- "30-54"
cf.flanders.2023.data$agegroup[cf.flanders.2023.data$age >= 55] <- "55+"

# Age group: frequency table (2023)
# Reference: Appendix - Table A1
freq_table <- table(cf.flanders.2023.data$agegroup)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

# Age (2024)
cf.flanders.2024.data$yob <- as.numeric(cf.flanders.2024.data$"_v3_label")
cf.flanders.2024.data$age <- 2024 - cf.flanders.2024.data$yob
cf.flanders.2024.data$age[cf.flanders.2024.data$age < 18] <- NA

# Age group (2024)
cf.flanders.2024.data$agegroup <- ""
cf.flanders.2024.data$agegroup[cf.flanders.2024.data$age >= 18 & cf.flanders.2024.data$age <= 34] <- "18-34"
cf.flanders.2024.data$agegroup[cf.flanders.2024.data$age >= 35 & cf.flanders.2024.data$age <= 54] <- "35-54"
cf.flanders.2024.data$agegroup[cf.flanders.2024.data$age >= 55] <- "55+"

# Age group: frequency table (2024)
# Reference: Appendix - Table A1
freq_table <- table(cf.flanders.2024.data$agegroup)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.11. VOTE INTENTION
################################################################################

# Vote intention (2023)
cf.flanders.2023.data$vote <- NA
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==-99] <- NA
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==1] <- "Groen"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==2] <- "Vooruit"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==3] <- "CD&V"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==4] <- "Open Vld"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==5] <- "N-VA"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==6] <- "Vlaams Belang"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==7] <- "PVDA"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==11] <- "Other"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==12] <- "None"
cf.flanders.2023.data$vote[cf.flanders.2023.data$q12==13] <- "Don't know"
cf.flanders.2023.data$vote <- as.character(cf.flanders.2023.data$vote)

# Vote intention: Groen (2023)
cf.flanders.2023.data$vote_groen[cf.flanders.2023.data$q12==1] <- 1
cf.flanders.2023.data$vote_groen[cf.flanders.2023.data$q12!=1] <- 0

# Vote intention: Vooruit (2023)
cf.flanders.2023.data$vote_vooruit[cf.flanders.2023.data$q12==2] <- 1
cf.flanders.2023.data$vote_vooruit[cf.flanders.2023.data$q12!=2] <- 0

# Vote intention: CD&V (2023)
cf.flanders.2023.data$vote_cdv[cf.flanders.2023.data$q12==3] <- 1
cf.flanders.2023.data$vote_cdv[cf.flanders.2023.data$q12!=3] <- 0

# Vote intention: Open Vld (2023)
cf.flanders.2023.data$vote_ovld[cf.flanders.2023.data$q12==4] <- 1
cf.flanders.2023.data$vote_ovld[cf.flanders.2023.data$q12!=4] <- 0

# Vote intention: N-VA (2023)
cf.flanders.2023.data$vote_nva[cf.flanders.2023.data$q12==5] <- 1
cf.flanders.2023.data$vote_nva[cf.flanders.2023.data$q12!=5] <- 0

# Vote intention: Vlaams Belang (2023)
cf.flanders.2023.data$vote_vb[cf.flanders.2023.data$q12==6] <- 1
cf.flanders.2023.data$vote_vb[cf.flanders.2023.data$q12!=6] <- 0

# Vote intention: PVDA (2023)
cf.flanders.2023.data$vote_pvda[cf.flanders.2023.data$q12==7] <- 1
cf.flanders.2023.data$vote_pvda[cf.flanders.2023.data$q12!=7] <- 0

# Vote intention: Other (2023)
cf.flanders.2023.data$vote_other[cf.flanders.2023.data$q12==11] <- 1
cf.flanders.2023.data$vote_other[cf.flanders.2023.data$q12!=11] <- 0

# Vote intention: None (2023)
cf.flanders.2023.data$vote_none[cf.flanders.2023.data$q12==12] <- 1
cf.flanders.2023.data$vote_none[cf.flanders.2023.data$q12!=12] <- 0

# Vote intention: Don't know (2023)
cf.flanders.2023.data$vote_dk[cf.flanders.2023.data$q12==13] <- 1
cf.flanders.2023.data$vote_dk[cf.flanders.2023.data$q12!=13] <- 0

# Vote intention (2024)
cf.flanders.2024.data$vote <- NA
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==-99] <- NA
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==2] <- "Groen"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==6] <- "Vooruit"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==1] <- "CD&V"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==4] <- "Open Vld"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==3] <- "N-VA"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==7] <- "Vlaams Belang"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==5] <- "PVDA"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==8] <- "Other"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==9] <- "None"
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==10] <- NA #Cannot
cf.flanders.2024.data$vote[cf.flanders.2024.data$Q15.4==11] <- "Don't know"
cf.flanders.2024.data$vote <- as.character(cf.flanders.2024.data$vote)

# Vote intention: Groen (2024)
cf.flanders.2024.data$vote_groen[cf.flanders.2024.data$Q15.4==2] <- 1
cf.flanders.2024.data$vote_groen[cf.flanders.2024.data$Q15.4!=2] <- 0

# Vote intention: Vooruit (2024)
cf.flanders.2024.data$vote_vooruit[cf.flanders.2024.data$Q15.4==6] <- 1
cf.flanders.2024.data$vote_vooruit[cf.flanders.2024.data$Q15.4!=6] <- 0

# Vote intention: CD&V (2024)
cf.flanders.2024.data$vote_cdv[cf.flanders.2024.data$Q15.4==1] <- 1
cf.flanders.2024.data$vote_cdv[cf.flanders.2024.data$Q15.4!=1] <- 0

# Vote intention: Open Vld (2024)
cf.flanders.2024.data$vote_ovld[cf.flanders.2024.data$Q15.4==4] <- 1
cf.flanders.2024.data$vote_ovld[cf.flanders.2024.data$Q15.4!=4] <- 0

# Vote intention: N-VA (2024)
cf.flanders.2024.data$vote_nva[cf.flanders.2024.data$Q15.4==3] <- 1
cf.flanders.2024.data$vote_nva[cf.flanders.2024.data$Q15.4!=3] <- 0

# Vote intention: Vlaams Belang (2024)
cf.flanders.2024.data$vote_vb[cf.flanders.2024.data$Q15.4==7] <- 1
cf.flanders.2024.data$vote_vb[cf.flanders.2024.data$Q15.4!=7] <- 0

# Vote intention: PVDA (2024)
cf.flanders.2024.data$vote_pvda[cf.flanders.2024.data$Q15.4==5] <- 1
cf.flanders.2024.data$vote_pvda[cf.flanders.2024.data$Q15.4!=5] <- 0

# Vote intention: Other (2024)
cf.flanders.2024.data$vote_other[cf.flanders.2024.data$Q15.4==8] <- 1
cf.flanders.2024.data$vote_other[cf.flanders.2024.data$Q15.4!=8] <- 0

# Vote intention: None (2024)
cf.flanders.2024.data$vote_none[cf.flanders.2024.data$Q15.4==9] <- 1
cf.flanders.2024.data$vote_none[cf.flanders.2024.data$Q15.4!=9] <- 0

# Vote intention: Don't know (2024)
cf.flanders.2024.data$vote_dk[cf.flanders.2024.data$Q15.4==11] <- 1
cf.flanders.2024.data$vote_dk[cf.flanders.2024.data$Q15.4!=11] <- 0

################################################################################
## 2.3.12. SEAT WINNERS
################################################################################

# Seat winners (2023)
cf.flanders.2023.data$seats_winner <- NA
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==-99] <- NA
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==1] <- 0
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==2] <- 1
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==3] <- 0
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==4] <- 0
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==5] <- 0
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==6] <- 1
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==7] <- 1
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==8] <- 1
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==9] <- 0
cf.flanders.2023.data$seats_winner[cf.flanders.2023.data$q12==10] <- 0

# Seat winners (2024)
cf.flanders.2024.data$seats_winner <- NA
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==-99] <- NA
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==1] <- 0
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==2] <- 1
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==3] <- 0
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==4] <- 0
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==5] <- 0
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==6] <- 1
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==7] <- 1
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==8] <- 0
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==9] <- 0
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==10] <- NA
cf.flanders.2024.data$seats_winner[cf.flanders.2024.data$Q15.4==11] <- 0

################################################################################
## 2.3.13. COALITION WINNERS
################################################################################

# Coalition winners (2023)
cf.flanders.2023.data$govt_winner <- NA
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==-99] <- NA
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==1] <- 0
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==2] <- 1
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==3] <- 1
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==4] <- 0
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==5] <- 1
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==6] <- 0
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==7] <- 0
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==8] <- 0
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==9] <- 0
cf.flanders.2023.data$govt_winner[cf.flanders.2023.data$q12==10] <- 0

# Coalition winners (2024)
cf.flanders.2024.data$govt_winner <- NA
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==-99] <- NA
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==1] <- 0
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==2] <- 1
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==3] <- 1
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==4] <- 0
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==5] <- 1
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==6] <- 0
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==7] <- 0
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==8] <- 0
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==9] <- 0
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==10] <- NA
cf.flanders.2024.data$govt_winner[cf.flanders.2024.data$Q15.4==11] <- 0

################################################################################
## 2.3.14. LEFT-RIGHT IDEOLOGY
################################################################################

# Left-right ideology (2023)
cf.flanders.2023.data$ideology <- cf.flanders.2023.data$q14
cf.flanders.2023.data$ideology[cf.flanders.2023.data$q14==-99] <- NA
cf.flanders.2023.data$ideology <- cf.flanders.2023.data$ideology - 1

# Left-right ideology (2024)
cf.flanders.2024.data$ideology <- cf.flanders.2024.data$Q2.5_1
cf.flanders.2024.data$ideology[cf.flanders.2024.data$Q2.5_1==-99] <- NA

################################################################################
## 2.3.14. POLITICAL INTEREST
################################################################################

# Political interest (2023 only)
cf.flanders.2023.data$interest <- cf.flanders.2023.data$q22
cf.flanders.2023.data$interest[cf.flanders.2023.data$q22==-99] <- NA

# Political interest: Add value labels to answer categories (2023 only)
cf.flanders.2023.data$interest <- factor(cf.flanders.2023.data$interest, 
                                         levels = c(1, 2, 3, 4, 5), labels = c("Not interested at all", 
                                                                               "Not interested", 
                                                                               "A little interested",
                                                                               "Interested",
                                                                               "Very interested"))

# Political interest: Create 3-category variable (2023 only)
cf.flanders.2023.data$interest3 <- NA
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==1] <- 1
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==2] <- 1
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==3] <- 2
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==4] <- 3
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==5] <- 3
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==-99] <- NA

# Political interest: Add value labels to answer categories (2023 only)
cf.flanders.2023.data$interest3 <- factor(cf.flanders.2023.data$interest3, 
                                          levels = c(1, 2, 3), 
                                          labels = c("Not interested", 
                                                     "A little", 
                                                     "Interested"))

################################################################################
## 2.3.15. ATTENTION PAID TO CAMPAIGN
################################################################################

# Attention paid to campaign (2024 only)
cf.flanders.2024.data$follow <- cf.flanders.2024.data$Q15.7
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==-99] <- NA
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==1] <- 4
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==2] <- 3
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==3] <- 2
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==4] <- 1

# Attention paid to campaign: Add value labels to answer categories (2024 only)
cf.flanders.2024.data$follow <- factor(cf.flanders.2024.data$follow, 
                                       levels = c(1, 2, 3, 4), labels = c("Not closely at all", 
                                                                          "Not so closely", 
                                                                          "Closely",
                                                                          "Very closely"))

################################################################################
## 2.3.16. EDUCATION
################################################################################

# Education (2024 only)
cf.flanders.2024.data$education <- cf.flanders.2024.data$Q2.4
cf.flanders.2024.data$education[cf.flanders.2024.data$Q2.4==-99] <- NA

# Education: Add value labels to answer categories (2024 only)
cf.flanders.2024.data$education <- factor(cf.flanders.2024.data$education, 
                                          levels = c(1, 2, 3, 4, 5, 6, 7, 8), 
                                          labels = c("Primary education", 
                                                     "Lower secondary education", 
                                                     "Upper secondary education",
                                                     "Graduate education",
                                                     "Professional bachelor's degree",
                                                     "Academic bachelor's degree",
                                                     "Master's degree",
                                                     "Doctoral degree"))

# Education: University degree (2024 only)
cf.flanders.2024.data$university <- NA
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==1] <- 0
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==2] <- 0
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==3] <- 0
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==4] <- 0
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==5] <- 0
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==6] <- 1
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==7] <- 1
cf.flanders.2024.data$university[cf.flanders.2024.data$Q2.4==8] <- 1

# Education (University degree): Add value labels to answer categories (2024 only)
cf.flanders.2024.data$university <- factor(cf.flanders.2024.data$university, 
                                           levels = c(0, 1), 
                                           labels = c("No university", 
                                                      "University"))

# Education: Create 3-category variable (2024 only)
cf.flanders.2024.data$education3 <- NA
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==1] <- 1
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==2] <- 1
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==3] <- 1
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==4] <- 2
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==5] <- 2
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==6] <- 3
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==7] <- 3
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==8] <- 3

# Education (3-category): Add value labels to answer categories (2024 only)
cf.flanders.2024.data$education3 <- factor(cf.flanders.2024.data$education3, 
                                           levels = c(1, 2, 3), 
                                           labels = c("Low", 
                                                      "Moderate", 
                                                      "High"))

# Education: Create 5-category variable (2024 only)
cf.flanders.2024.data$Q2.4 <- as.numeric(cf.flanders.2024.data$Q2.4)
cf.flanders.2024.data$education5 <- cf.flanders.2024.data$Q2.4
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==-99] <- NA
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==1] <- "Primary or less"
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==2] <- "Lower secondary"
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==3] <- "Upper secondary"
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==4] <- "Higher non-university"
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==5] <- "Higher non-university"
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==6] <- "Higher university"
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==7] <- "Higher university"
cf.flanders.2024.data$education5[cf.flanders.2024.data$Q2.4==8] <- "Higher university"

# Education: frequency table (2024 only)
# Reference: Appendix - Table A1
freq_table <- table(cf.flanders.2024.data$education5)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.17. SOPHISTICATION INDEX
################################################################################

# Sophistication index (2024 only)
cf.flanders.2024.data$follow_z <- scale(as.numeric(cf.flanders.2024.data$follow))
cf.flanders.2024.data$education_z <- scale(as.numeric(cf.flanders.2024.data$education))

cf.flanders.2024.data$sophistication_index <- rowMeans(
  cf.flanders.2024.data[, c("follow_z", "education_z")], na.rm = TRUE
)

################################################################################
## 2.3.18. SURVEY YEAR
################################################################################

# Survey year (2023)
cf.flanders.2023.data$survey <- 2023

# Survey (2024)
cf.flanders.2024.data$survey <- 2024

################################################################################
## 2.3.18. SURVEY TYPE
################################################################################

# Survey type (2023)
cf.flanders.2023.data$type <- 0

# Survey type (2024)
cf.flanders.2024.data$type <- 0

################################################################################
## 2.4. DEFINE VARIABLE TYPE
################################################################################

# Define variable type (2023)
cf.flanders.2023.data$vote <- as.character(cf.flanders.2023.data$vote)
cf.flanders.2023.data$vote_pvda <- as.factor(cf.flanders.2023.data$vote_pvda)
cf.flanders.2023.data$vote_groen <- as.factor(cf.flanders.2023.data$vote_groen)
cf.flanders.2023.data$vote_vooruit <- as.factor(cf.flanders.2023.data$vote_vooruit)
cf.flanders.2023.data$vote_ovld <- as.factor(cf.flanders.2023.data$vote_ovld)
cf.flanders.2023.data$vote_cdv <- as.factor(cf.flanders.2023.data$vote_cdv)
cf.flanders.2023.data$vote_nva <- as.factor(cf.flanders.2023.data$vote_nva)
cf.flanders.2023.data$vote_vb <- as.factor(cf.flanders.2023.data$vote_vb)
cf.flanders.2023.data$vote_other <- as.factor(cf.flanders.2023.data$vote_other)
cf.flanders.2023.data$vote_none <- as.factor(cf.flanders.2023.data$vote_none)
cf.flanders.2023.data$vote_dk <- as.factor(cf.flanders.2023.data$vote_dk)
cf.flanders.2023.data$seats_accuracy <- as.factor(cf.flanders.2023.data$seats_accuracy)
cf.flanders.2023.data$interest <- as.factor(cf.flanders.2023.data$interest)
cf.flanders.2023.data$sex <- as.factor(cf.flanders.2023.data$sex)
cf.flanders.2023.data$age <- as.numeric(cf.flanders.2023.data$age)
cf.flanders.2023.data$ideology <- as.numeric(cf.flanders.2023.data$ideology)
cf.flanders.2023.data$survey <- as.factor(cf.flanders.2023.data$survey)
cf.flanders.2023.data$type <- as.factor(cf.flanders.2023.data$type)
cf.flanders.2023.data$seats_pvda <- as.factor(cf.flanders.2023.data$seats_pvda)
cf.flanders.2023.data$seats_groen <- as.factor(cf.flanders.2023.data$seats_groen)
cf.flanders.2023.data$seats_vooruit <- as.factor(cf.flanders.2023.data$seats_vooruit)
cf.flanders.2023.data$seats_cdv <- as.factor(cf.flanders.2023.data$seats_cdv)
cf.flanders.2023.data$seats_ovld <- as.factor(cf.flanders.2023.data$seats_ovld)
cf.flanders.2023.data$seats_nva <- as.factor(cf.flanders.2023.data$seats_nva)
cf.flanders.2023.data$seats_vb <- as.factor(cf.flanders.2023.data$seats_vb)
cf.flanders.2023.data$govt_pvda <- as.numeric(cf.flanders.2023.data$govt_pvda)
cf.flanders.2023.data$govt_groen <- as.numeric(cf.flanders.2023.data$govt_groen)
cf.flanders.2023.data$govt_vooruit <- as.numeric(cf.flanders.2023.data$govt_vooruit)
cf.flanders.2023.data$govt_cdv <- as.numeric(cf.flanders.2023.data$govt_cdv)
cf.flanders.2023.data$govt_ovld <- as.numeric(cf.flanders.2023.data$govt_ovld)
cf.flanders.2023.data$govt_nva <- as.numeric(cf.flanders.2023.data$govt_nva)
cf.flanders.2023.data$govt_vb <- as.numeric(cf.flanders.2023.data$govt_vb)
cf.flanders.2023.data$seats_accuracy <- as.numeric(cf.flanders.2023.data$seats_accuracy)

# Define variable type (2024)
cf.flanders.2024.data$vote <- as.character(cf.flanders.2024.data$vote)
cf.flanders.2024.data$vote_pvda <- as.factor(cf.flanders.2024.data$vote_pvda)
cf.flanders.2024.data$vote_groen <- as.factor(cf.flanders.2024.data$vote_groen)
cf.flanders.2024.data$vote_vooruit <- as.factor(cf.flanders.2024.data$vote_vooruit)
cf.flanders.2024.data$vote_ovld <- as.factor(cf.flanders.2024.data$vote_ovld)
cf.flanders.2024.data$vote_cdv <- as.factor(cf.flanders.2024.data$vote_cdv)
cf.flanders.2024.data$vote_nva <- as.factor(cf.flanders.2024.data$vote_nva)
cf.flanders.2024.data$vote_vb <- as.factor(cf.flanders.2024.data$vote_vb)
cf.flanders.2024.data$vote_other <- as.factor(cf.flanders.2024.data$vote_other)
cf.flanders.2024.data$vote_none <- as.factor(cf.flanders.2024.data$vote_none)
cf.flanders.2024.data$vote_dk <- as.factor(cf.flanders.2024.data$vote_dk)
cf.flanders.2024.data$seats_accuracy <- as.factor(cf.flanders.2024.data$seats_accuracy)
cf.flanders.2024.data$follow <- as.factor(cf.flanders.2024.data$follow)
cf.flanders.2024.data$sex <- as.factor(cf.flanders.2024.data$sex)
cf.flanders.2024.data$age <- as.numeric(cf.flanders.2024.data$age)
cf.flanders.2024.data$education <- as.numeric(cf.flanders.2024.data$education)
cf.flanders.2024.data$ideology <- as.numeric(cf.flanders.2024.data$ideology)
cf.flanders.2024.data$survey <- as.factor(cf.flanders.2024.data$survey)
cf.flanders.2024.data$type <- as.factor(cf.flanders.2024.data$type)
cf.flanders.2024.data$seats_pvda <- as.factor(cf.flanders.2024.data$seats_pvda)
cf.flanders.2024.data$seats_groen <- as.factor(cf.flanders.2024.data$seats_groen)
cf.flanders.2024.data$seats_vooruit <- as.factor(cf.flanders.2024.data$seats_vooruit)
cf.flanders.2024.data$seats_cdv <- as.factor(cf.flanders.2024.data$seats_cdv)
cf.flanders.2024.data$seats_ovld <- as.factor(cf.flanders.2024.data$seats_ovld)
cf.flanders.2024.data$seats_nva <- as.factor(cf.flanders.2024.data$seats_nva)
cf.flanders.2024.data$seats_vb <- as.factor(cf.flanders.2024.data$seats_vb)
cf.flanders.2024.data$govt_pvda <- as.numeric(cf.flanders.2024.data$govt_pvda)
cf.flanders.2024.data$govt_groen <- as.numeric(cf.flanders.2024.data$govt_groen)
cf.flanders.2024.data$govt_vooruit <- as.numeric(cf.flanders.2024.data$govt_vooruit)
cf.flanders.2024.data$govt_cdv <- as.numeric(cf.flanders.2024.data$govt_cdv)
cf.flanders.2024.data$govt_ovld <- as.numeric(cf.flanders.2024.data$govt_ovld)
cf.flanders.2024.data$govt_nva <- as.numeric(cf.flanders.2024.data$govt_nva)
cf.flanders.2024.data$govt_vb <- as.numeric(cf.flanders.2024.data$govt_vb)
cf.flanders.2024.data$seats_accuracy <- as.numeric(cf.flanders.2024.data$seats_accuracy)
cf.flanders.2024.data$sophistication_index <- as.numeric(cf.flanders.2024.data$sophistication_index)

################################################################################
## 2.5. SELECT VARIABLES
################################################################################

# Select variables (2023)
cf.flanders.2023.data <- cf.flanders.2023.data %>%
  dplyr::select(seats_pvda, seats_groen, seats_vooruit, seats_cdv, seats_ovld, seats_nva, seats_vb,
                govt_pvda, govt_groen, govt_vooruit, govt_cdv, govt_ovld, govt_nva, govt_vb,
                vote_pvda, vote_groen, vote_vooruit, vote_cdv, vote_ovld, vote_nva, vote_vb,
                vote_other, vote_none, vote_dk, interest, ideology, sex, age, survey, type, vote, 
                ideology, seats_winner, govt_winner, seats_accuracy, govt_brier,
                seats_pvda_correct, seats_groen_correct, seats_vooruit_correct, 
                seats_cdv_correct, seats_ovld_correct, seats_nva_correct, 
                seats_vb_correct, govt_pvda_brier, govt_groen_brier, govt_vooruit_brier,
                govt_cdv_brier, govt_ovld_brier, govt_nva_brier, govt_vb_brier)

# Select variables (2024)
cf.flanders.2024.data <- cf.flanders.2024.data %>%
  dplyr::select(seats_pvda, seats_groen, seats_vooruit, seats_cdv, seats_ovld, seats_nva, seats_vb,
                govt_pvda, govt_groen, govt_vooruit, govt_cdv, govt_ovld, govt_nva, govt_vb,
                vote_pvda, vote_groen, vote_vooruit, vote_cdv, vote_ovld, vote_nva, vote_vb, 
                vote_other, vote_none, vote_dk, follow, ideology, education, university, sex, age, survey, type, vote,
                ideology, seats_winner, govt_winner, seats_accuracy, govt_brier,
                seats_pvda_correct, seats_groen_correct, seats_vooruit_correct, 
                seats_cdv_correct, seats_ovld_correct, seats_nva_correct, 
                seats_vb_correct, govt_pvda_brier, govt_groen_brier, govt_vooruit_brier,
                govt_cdv_brier, govt_ovld_brier, govt_nva_brier, govt_vb_brier, sophistication_index)

################################################################################
## 2.6. EXPORT CLEANED DATA (2023, 2024, combined)
################################################################################

# NOTE: Exports are written to the working directory.
#       Use these files as the standardized inputs for modelling / visualization scripts.


# Save dataframe in Stata format (2023)
write_dta(cf.flanders.2023.data, "cf_flanders_2023_data.dta")

# Save dataframe in R format (2023)
saveRDS(cf.flanders.2023.data, "cf_flanders_2023_data.rds")

# Save dataframe in Stata format (2024)
write_dta(cf.flanders.2024.data, "cf_flanders_2024_data.dta")

# Save dataframe in R format (2023)
saveRDS(cf.flanders.2024.data, "cf_flanders_2024_data.rds")

################################################################################
## 2.7. MERGE 2023 AND 2024 DATAFRAMES
################################################################################

# Merge 2023 and 2024 dataframes 
cf.flanders.data <- dplyr::bind_rows(cf.flanders.2023.data, cf.flanders.2024.data)

# Save dataframe in Stata format
write_dta(cf.flanders.data, "cf_flanders_data.dta")

# Save dataframe in R format
saveRDS(cf.flanders.data, "cf_flanders_data.rds")

# =============================================================================
# END OF SCRIPT
# =============================================================================